Introduction

SBA - Small Business Profiles for the States and Territories

The Office of Advocacy’s Small Business Profiles are an annual analysis of each state’s small business activities. Each profile gathers the latest information from key federal data-gathering agencies to provide a snapshot of small business health and economic activity. This year’s profiles report on state economic growth and employment; small business employment, industry composition, and turnover; plus business owner demographics and county-level employment change.

https://www.sba.gov/

In [1]:
from IPython.core.display import display, HTML
display(HTML("""<style> .container {width:96% !important;}</style>"""))

from IPython.display import IFrame
In [2]:
import pandas as pd
import multiprocessing
import numpy as np
from multiprocessing.dummy import Pool as ThreadPool
from functools import partial
import math

# Handle s3 or local
import s3fs
from os import listdir
from os.path import isfile, join
import subprocess

# Analysis
from plotly.offline import init_notebook_mode, iplot
import cufflinks as cf
init_notebook_mode()
cf.go_offline()

Path to the files

In [3]:
import sys
sys.path.insert(0,'../')
from Tools.paths import *
In [4]:
def list_files(path,ext = 'pdf'):
    if path.startswith('s3://'):  
        onlyfiles = subprocess.check_output(['aws', 's3', 'ls', path_s3])
        onlyfiles = onlyfiles.split('\n')
        onlyfiles = [f.split(" ")[-1] for f in onlyfiles]
    else:
        onlyfiles = [f for f in listdir(path_local) if isfile(join(path_local, f))]
    onlyfiles = [f for f in onlyfiles if f.endswith('.{}'.format(ext))]
    files = [f.replace('.{}'.format(ext),'') for f in onlyfiles]
    return files
In [5]:
def path(path,name,ext = 'pdf'):
    path_file = '{}{}.{}'.format(path,name,ext)
    return path_file

Analysis

Industry

In [6]:
industry = pd.read_csv(path(path_s3_out,'industry','csv'),sep = ";")
In [7]:
industry = industry[industry['Industry']!='Total']
In [8]:
industry.head()
Out[8]:
Industry 1-499 Employees 1-19 Employees Nonemployer Firms Total Small Firms State
0 Retail Trade 10674 9627 27992 38666 Alabama.pdf
1 Other Services (except Public Administration) 10042 9332 63575 73617 Alabama.pdf
2 Professional, Scientific, and Technical Services 8081 7378 31099 39180 Alabama.pdf
3 Health Care and Social Assistance 7823 6670 21808 29631 Alabama.pdf
4 Construction 7143 6373 39463 46606 Alabama.pdf
In [9]:
df_aux = industry[['Industry','1-499 Employees','Nonemployer Firms']] 
df_aux = df_aux.groupby('Industry').sum()
In [10]:
df_aux.iplot(kind = 'bar')

Employment

In [11]:
employment = pd.read_csv(path(path_s3_out,'employment','csv'),sep = ";")
In [12]:
df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2011_us_ag_exports.csv')
df.head()
df = df[['code','state']]
df.drop_duplicates(inplace=True)
In [13]:
employment.head()
Out[13]:
Industry Small Business Employment Total Private Employment Small Business Emp Share State
0 Health Care and Social Assistance 113580.0 240549.0 47.2 Alabama.pdf
1 Accommodation and Food Services 89707.0 161421.0 55.6 Alabama.pdf
2 Retail Trade 87257.0 222277.0 39.3 Alabama.pdf
3 Manufacturing 79632.0 242093.0 32.9 Alabama.pdf
4 Other Services (except Public Administration) 68770.0 80073.0 85.9 Alabama.pdf
In [14]:
df_aux = employment.groupby('State').sum()
In [15]:
df_aux = df_aux.reset_index()
In [16]:
df_aux['State'] = df_aux.State.apply(lambda x: x.replace("_"," ").split(".")[0])
print df_aux.shape
df_aux = df_aux.merge(df, left_on = 'State', right_on = 'state', how = 'left')
print df_aux.shape
(51, 4)
(51, 6)
In [17]:
scl = [[0.0, 'rgb(242,240,247)'],[0.2, 'rgb(218,218,235)'],[0.4, 'rgb(188,189,220)'],\
            [0.6, 'rgb(158,154,200)'],[0.8, 'rgb(117,107,177)'],[1.0, 'rgb(84,39,143)']]

df_aux['text'] = df_aux['State']

data = [ dict(
        type='choropleth',
        colorscale = scl,
        autocolorscale = False,
        locations = df_aux['code'],
        z = df_aux['Small Business Employment'].astype(float),
        locationmode = 'USA-states',
        text = df_aux['text'],
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            ) ),
        colorbar = dict(
            title = "Millions USD")
        ) ]

layout = dict(
        title = '2016 USA Small Business Employment by State',
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showlakes = True,
            lakecolor = 'rgb(255, 255, 255)'),
             )
    
fig = dict( data=data, layout=layout )
iplot( fig, filename='d3-cloropleth-map' )

Demographics

In [18]:
demographic = pd.read_csv(path(path_s3_out,'demographic','csv'),sep = ";")
In [19]:
demographic.head()
Out[19]:
Alaskan-owned American-owned Asian-owned Hispanic-owned Islander-owned Minority-owned Nonminority-owned State
0 27.0 28.7 35.4 51.5 -16.9 30.7 -8.6 Alabama.pdf
1 9.3 22.2 41.2 NaN 32.8 16.5 -2.3 Alaska.pdf
2 20.2 52.8 35.2 69.7 NaN 58.8 -7.3 Arizona.pdf
3 15.7 55.3 43.1 46.8 66.3 52.2 -7.4 Arkansas.pdf
4 27.8 33.9 28.6 51.5 21.5 43.9 -2.8 Colorado.pdf
In [20]:
df_aux = demographic
In [21]:
df_aux['State'] = df_aux.State.apply(lambda x: x.replace("_"," ").split(".")[0])
print df_aux.shape
df_aux = df_aux.merge(df, left_on = 'State', right_on = 'state', how = 'left')
print df_aux.shape
(51, 8)
(51, 10)
In [22]:
scl = [[0.0, 'rgb(204,229,255)'],[0.2, 'rgb(153,204,255)'],[0.4, 'rgb(102,178,255)'],\
            [0.6, 'rgb(51,153,255)'],[0.8, 'rgb(0,128,255)'],[1.0, 'rgb(0,102,204)']]

df_aux['text'] = df_aux['State']

data = [ dict(
        type='choropleth',
        colorscale = scl,
        autocolorscale = False,
        locations = df_aux['code'],
        z = df_aux['Nonminority-owned'].astype(float),
        locationmode = 'USA-states',
        text = df_aux['text'],
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            ) ),
        colorbar = dict(
            title = "%")
        ) ]

layout = dict(
        title = '2016 USA Non-minority changes in business ownership by State',
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showlakes = True,
            lakecolor = 'rgb(255, 255, 255)'),
             )
    
fig = dict( data=data, layout=layout )
iplot( fig, filename='d3-cloropleth-map' )